Main Figures
Figures 3, 5 and 6 were constructed manually.
Figure 4: A-domains
Reconstructing the domains
cols <- readRDS(file = "Fig4/Colors_arrow.Rds")
cols.origin <- readRDS(file = "Fig4/Colors_arrow_origin.Rds")
domains.species <- readRDS("Fig4/domains_species.Rds")
dom.gene.lenght <- readRDS("Fig4/domains_gene_length.Rds")
bashet.nrps <- ggplot(domains.species[[1]], aes(x=1)) + geom_segment(data = dom.gene.lenght[[1]], aes(x=0,xend=end,y=position, yend=position)) + geom_segment(aes(x=start,xend=finish, y=position, yend=position, color=Class), arrow=arrow(length=unit(0.15,"cm"), type = "closed"), size=2) + theme_classic() + facet_grid(Gene ~ ., scales = "free") + xlab("Position") + ylab("NRPS gene") + scale_color_manual(values = cols[names(cols) %in% unique(domains.species[[1]]$Class)]) + theme(axis.text.y=element_blank(),axis.ticks=element_blank(),legend.position = "bottom",strip.text.y = element_text(angle = 0), axis.line.y = element_blank()) + guides(fill=FALSE)
basmer.jgi.nrps <- ggplot(domains.species[[2]], aes(x=1)) + geom_segment(data = dom.gene.lenght[[2]], aes(x=0,xend=end,y=position, yend=position)) + geom_segment(aes(x=start,xend=finish, y=position, yend=position, color=Class), arrow=arrow(length=unit(0.15,"cm"), type = "closed"), size=2) + theme_classic() + facet_grid(Gene ~ ., scales = "free") + xlab("Position") + ylab("NRPS gene") + scale_color_manual(values = cols[names(cols) %in% unique(domains.species[[2]]$Class)]) + theme(axis.text.y=element_blank(),axis.ticks=element_blank(),legend.position = "bottom",strip.text.y = element_text(angle = 0), axis.line.y = element_blank()) + guides(fill=FALSE)
basmer.b9252.nrps <- ggplot(domains.species[[3]], aes(x=1)) + geom_segment(data = dom.gene.lenght[[3]], aes(x=0,xend=end,y=position, yend=position)) + geom_segment(aes(x=start,xend=finish, y=position, yend=position, color=Class), arrow=arrow(length=unit(0.15,"cm"), type = "closed"), size=2) + theme_classic() + facet_grid(Gene ~ ., scales = "free") + xlab("Position") + ylab("NRPS gene") + scale_color_manual(values = cols[names(cols) %in% unique(domains.species[[3]]$Class)]) + theme(axis.text.y=element_blank(),axis.ticks=element_blank(),legend.position = "bottom",strip.text.y = element_text(angle = 0), axis.line.y = element_blank()) + guides(fill=FALSE)
# Plot
grid.arrange(bashet.nrps, basmer.jgi.nrps, basmer.b9252.nrps)

Figure 7: HGT counts
HGT.counts <- read.table("Fig7/HGT_counts.txt", sep = "\t", header = T)
HGT.counts$Origin <- factor(HGT.counts$Origin, levels = c("a-proteobacteria","b-proteobacteria","d-proteobacteria","e-proteobacteria","g-proteobacteria","proteobacteria","firmicutes","actinobacteria","high GC Gram+","enterobacteria","planctomycetes","CFB group bacteria","GNS bacteria","verrucomicrobia","fusobacteria","cyanobacteria","chlamydias","mycoplasmas","aquificales","bacteria","euryarchaeotes","archaea"))
HGT.counts.prop <- HGT.counts
HGT.counts.prop$bashet <- HGT.counts$bashet/9331
HGT.counts.prop$basmer_B9252 <- HGT.counts$basmer_B9252/13273
HGT.counts.prop$basmer_HGT <- HGT.counts$basmer_HGT/16111
HGT.counts[order(HGT.counts$Origin, HGT.counts$Origin),]
## Origin bashet basmer_B9252 basmer_HGT
## 1 a-proteobacteria 21 33 26
## 5 b-proteobacteria 21 31 24
## 10 d-proteobacteria 23 40 39
## 11 e-proteobacteria 2 1 4
## 16 g-proteobacteria 27 54 50
## 21 proteobacteria 1 1 2
## 14 firmicutes 54 87 54
## 2 actinobacteria 1 4 1
## 18 high GC Gram+ 39 63 68
## 12 enterobacteria 2 10 8
## 20 planctomycetes 2 6 15
## 7 CFB group bacteria 33 60 36
## 17 GNS bacteria 12 28 51
## 22 verrucomicrobia 5 8 4
## 15 fusobacteria 2 NA NA
## 9 cyanobacteria 42 51 39
## 8 chlamydias 1 NA NA
## 19 mycoplasmas 1 NA NA
## 3 aquificales 1 NA 1
## 6 bacteria 6 21 16
## 13 euryarchaeotes 4 4 3
## 4 archaea 1 1 1
HGT.counts.prop[order(HGT.counts.prop$Origin, HGT.counts$Origin),]
## Origin bashet basmer_B9252 basmer_HGT
## 1 a-proteobacteria 0.0022505626 2.486250e-03 1.613804e-03
## 5 b-proteobacteria 0.0022505626 2.335568e-03 1.489665e-03
## 10 d-proteobacteria 0.0024649019 3.013637e-03 2.420706e-03
## 11 e-proteobacteria 0.0002143393 7.534092e-05 2.482776e-04
## 16 g-proteobacteria 0.0028935805 4.068410e-03 3.103470e-03
## 21 proteobacteria 0.0001071696 7.534092e-05 1.241388e-04
## 14 firmicutes 0.0057871611 6.554660e-03 3.351747e-03
## 2 actinobacteria 0.0001071696 3.013637e-04 6.206939e-05
## 18 high GC Gram+ 0.0041796163 4.746478e-03 4.220719e-03
## 12 enterobacteria 0.0002143393 7.534092e-04 4.965551e-04
## 20 planctomycetes 0.0002143393 4.520455e-04 9.310409e-04
## 7 CFB group bacteria 0.0035365984 4.520455e-03 2.234498e-03
## 17 GNS bacteria 0.0012860358 2.109546e-03 3.165539e-03
## 22 verrucomicrobia 0.0005358482 6.027273e-04 2.482776e-04
## 15 fusobacteria 0.0002143393 NA NA
## 9 cyanobacteria 0.0045011253 3.842387e-03 2.420706e-03
## 8 chlamydias 0.0001071696 NA NA
## 19 mycoplasmas 0.0001071696 NA NA
## 3 aquificales 0.0001071696 NA 6.206939e-05
## 6 bacteria 0.0006430179 1.582159e-03 9.931103e-04
## 13 euryarchaeotes 0.0004286786 3.013637e-04 1.862082e-04
## 4 archaea 0.0001071696 7.534092e-05 6.206939e-05
HGT.counts.m <- melt(HGT.counts.prop)
## Using Origin as id variables
# Plot
ggplot(HGT.counts.m, aes(x=variable,y=value,fill=Origin)) + geom_bar(stat='identity', position = 'fill') + scale_fill_manual(values = c("#FFAAAA","#E37B7B","#D46A6A","#801515","#550000","#2B0000","#ED8229","#03D5D5","#198E8E","#674A33","#E9038F","#FFFF04","#ED5DBA","#72335B","#7F0855","#08AF13","#AA5704","#73239F","#1CA06E","#221617","#736058","#4E4E4E")) + theme_bw() + xlab("Taxon") + ylab("Normalized proportion of genes with HGT evidence ") + theme_classic()
## Warning: Removed 7 rows containing missing values (position_stack).

Supplementary figures
Sup. Figs 2 and 11 were constructed by hand
Sup. Figure 1: NRPS tree
zygo_tree <- read.tree("SF1/NRPS.tre")
# Reading assignments from Bushley 2010
table.names <- read.table("SF1/Bushley_names.txt", sep = "\t", header = T, stringsAsFactors = F)
# Assigning names
alin.names.org <- data.frame(names=zygo_tree$tip.label, group=table.names$Group[match(zygo_tree$tip.label,table.names$name.or.i.)], stringsAsFactors = F)
# Names_step1
names.step1 <- strsplit(alin.names.org$names, split = "_") %>% lapply(function (x) paste(x[1], x[2])) %>% unlist
names.step1[grep(names.step1, pattern = "/")] <- grep(names.step1, pattern = "/", value = T) %>% strsplit(split = " ") %>% lapply(function (x) x[1]) %>% unlist
names.step1 <- gsub(names.step1, pattern = " 1$", replacement = "_1") %>% gsub(pattern = " 2$", replacement = "_2") %>% gsub(perl = T, pattern = " \\d+$", replacement = "_JGI")
names_NRPS <- data.frame(tip.label= zygo_tree$tip.label, file= names.step1, stringsAsFactors = F)
# Separating by genus
names_NRPS$file <- strsplit(names_NRPS$file, split = " ", fixed = T) %>% lapply(function (x) x[1]) %>% unlist %>% gsub(pattern = "jgi\\|", replacement = "") %>% strsplit(split = "|", fixed = T) %>% lapply(function (x) x[1]) %>% unlist
names_NRPS$genus <- names_NRPS$file
# JGI names
all.jgi <- read.csv('JGI_code_taxonomy.csv')
head(all.jgi)
## Phylum_name Subphylum_name Class_name Order_name
## 1 Ascomycota Pezizomycotina Dothideomycetes Pleosporales
## 2 Basidiomycota Agaricomycotina Agaricomycetes Polyporales
## 3 Basidiomycota Agaricomycotina Agaricomycetes Polyporales
## 4 Mucoromycota Mucoromycotina Incertae_sedis Mucorales
## 5 Mucoromycota Mucoromycotina Incertae_sedis Mucorales
## 6 Basidiomycota Ustilaginomycotina Exobasidiomycetes Exobasidiales
## Family_name file Species
## 1 Dacampiaceae Aaoar1 Aaosphaeria arxii CBS 175.79 v1.0
## 2 Meruliaceae Abobi1 Abortiporus biennis CCBS 521 v1.0
## 3 Meruliaceae Abobie1 Abortiporus biennis CIRM-BRFM1778 v1.0
## 4 Cunninghamellaceae Chlpad1 Absidia padenii NRRL 2977 v1.0
## 5 Cunninghamellaceae Absrep1 Absidia repens NRRL 1336 v1.0
## 6 Cryptobasidiaceae Acain1 Acaromyces ingoldii MCA 4198 v1.0
all.jgi$genus <- strsplit(as.character(all.jgi$Species), split = " ", fixed = T) %>% lapply(function (x) x[1])
# Merging the names
zygo.tree <- merge(names_NRPS, all.jgi,by = 'file', no.dups = T, sort = F)
zygo.tree <- zygo.tree[c(2,4)]
fungi.tree <- merge(names_NRPS, all.jgi,by = 'genus', no.dups = T, sort = F)
fungi.tree <- fungi.tree[c(2,4)]
fungi.tree <- fungi.tree[!duplicated(fungi.tree),]
fungi.all <- rbind(fungi.tree, zygo.tree)
fungi.all <- fungi.all[grep(names_NRPS$tip.label, pattern = "Basme2finSC",invert = T),]
fungi.all <- rbind(fungi.all, data.frame("tip.label" = grep(names_NRPS$tip.label, pattern = "N161|Basme2finSC", value = T), "Phylum_name" = "BasMer"))
fungi.all <- rbind(fungi.all, data.frame("tip.label" = grep(names_NRPS$tip.label, pattern = "N168", value = T), "Phylum_name" = "BasHet"))
# Plotting the tree
zygo_fort <- fortify(zygo_tree)
zygo_fort$bootstrap <- NA
zygo_fort$bootstrap[!zygo_fort$isTip] <- as.numeric(zygo_tree$node.label)
## Warning: NAs introduced by coercion
zygo_fort$bootstrap[zygo_fort$bootstrap < 70] <- NA
ggtree(zygo_fort, size=0.3) %<+% fungi.all + geom_tiplab(size=1, aes(color=Phylum_name)) + geom_text(aes(label=bootstrap), vjust=-.5, hjust=-.5, size=1) + theme(legend.position = "bottom")
## Warning: Removed 1461 rows containing missing values (geom_text).

Figure was finalized by hand in Adobe Illustrator 2020 to color the missing tip labels, improve legibility and legend, and extend root.
Sup. Figure 3: PKS tree
zygo_tree <- read.tree("SF3/PKS.tre")
# Assigning names
alin.names.org <- data.frame(names=zygo_tree$tip.label, stringsAsFactors = F)
# Names_step1
names.step1 <- strsplit(alin.names.org$names, split = "_") %>% lapply(function (x) paste(x[1], x[2])) %>% unlist
names.step1[grep(names.step1, pattern = "/")] <- grep(names.step1, pattern = "/", value = T) %>% strsplit(split = " ") %>% lapply(function (x) x[1]) %>% unlist
names.step1 <- gsub(names.step1, pattern = " 1$", replacement = "_1") %>% gsub(pattern = " 2$", replacement = "_2") %>% gsub(perl = T, pattern = " \\d+$", replacement = "_JGI")
names_NRPS <- data.frame(tip.label= zygo_tree$tip.label, file= names.step1, stringsAsFactors = F)
# Separating by genus
names_NRPS$file <- strsplit(names_NRPS$file, split = " ", fixed = T) %>% lapply(function (x) x[1]) %>% unlist %>% gsub(pattern = "jgi\\|", replacement = "") %>% strsplit(split = "|", fixed = T) %>% lapply(function (x) x[1]) %>% unlist
names_NRPS$genus <- names_NRPS$file
# JGI names
all.jgi <- read.csv('JGI_code_taxonomy.csv')
all.jgi$genus <- strsplit(as.character(all.jgi$Species), split = " ", fixed = T) %>% lapply(function (x) x[1])
# Merging the names
zygo.tree <- merge(names_NRPS, all.jgi,by = 'file', no.dups = T, sort = F)
zygo.tree <- zygo.tree[c(2,4)]
fungi.tree <- merge(names_NRPS, all.jgi,by = 'genus', no.dups = T, sort = F)
fungi.tree <- fungi.tree[c(2,4)]
fungi.all <- rbind(zygo.tree, fungi.tree)
fungi.all <- fungi.all[grep(names_NRPS$tip.label, pattern = "Basme2finSC",invert = T),]
fungi.all <- rbind(fungi.all, data.frame("tip.label" = grep(names_NRPS$tip.label, pattern = "N161|Basme2finSC", value = T), "Phylum_name" = "BasMer"))
fungi.all <- rbind(fungi.all, data.frame("tip.label" = grep(names_NRPS$tip.label, pattern = "N168", value = T), "Phylum_name" = "BasHet"))
# Plotting the tree
zygo_fort <- fortify(zygo_tree)
zygo_fort$bootstrap <- NA
zygo_fort$bootstrap[!zygo_fort$isTip] <- as.numeric(zygo_tree$node.label)
## Warning: NAs introduced by coercion
zygo_fort$bootstrap[zygo_fort$bootstrap < 70] <- NA
ggtree(zygo_fort, size=0.3) %<+% fungi.all + geom_tiplab(size=1, aes(color=Phylum_name)) + geom_text(aes(label=bootstrap), vjust=-.5, hjust=-.5, size=1) + theme(legend.position = "bottom")
## Warning: Removed 650 rows containing missing values (geom_text).

Figure was finalized by hand in Adobe Illustrator 2020 to color the missing tip labels, improve legibility and legend,and extend root.
Sup. Figure 6: TC tree
zygo_tree <- read.tree("SF6/TC.tree")
# Assigning names
alin.names.org <- data.frame(names=zygo_tree$tip.label, stringsAsFactors = F)
# Names_step1
names.step1 <- strsplit(alin.names.org$names, split = "_") %>% lapply(function (x) paste(x[1], x[2])) %>% unlist
names.step1[grep(names.step1, pattern = "/")] <- grep(names.step1, pattern = "/", value = T) %>% strsplit(split = " ") %>% lapply(function (x) x[1]) %>% unlist
names.step1 <- gsub(names.step1, pattern = " 1$", replacement = "_1") %>% gsub(pattern = " 2$", replacement = "_2") %>% gsub(perl = T, pattern = " \\d+$", replacement = "_JGI")
names_TC <- data.frame(tip.label= zygo_tree$tip.label, file= names.step1, stringsAsFactors = F)
# Separating by genus
names_TC$file <- strsplit(names_TC$file, split = " ", fixed = T) %>% lapply(function (x) x[1]) %>% unlist %>% gsub(pattern = "jgi\\|", replacement = "") %>% strsplit(split = "|", fixed = T) %>% lapply(function (x) x[1]) %>% unlist
names_TC$genus <- names_TC$file
# JGI names
all.jgi <- read.csv('JGI_code_taxonomy.csv')
all.jgi$genus <- strsplit(as.character(all.jgi$Species), split = " ", fixed = T) %>% lapply(function (x) x[1])
# Merging the names
zygo.tree <- merge(names_TC, all.jgi,by = 'file', no.dups = T, sort = F)
zygo.tree <- zygo.tree[c(2,4)]
fungi.tree <- merge(names_TC, all.jgi,by = 'genus', no.dups = T, sort = F)
fungi.tree <- fungi.tree[c(2,4)]
fungi.all <- rbind(zygo.tree, fungi.tree)
fungi.all <- fungi.all[grep(names_TC$tip.label, pattern = "Basme2finSC",invert = T),]
fungi.all <- rbind(fungi.all, data.frame("tip.label" = grep(names_TC$tip.label, pattern = "N161|Basme2finSC", value = T), "Phylum_name" = "BasMer"))
fungi.all <- rbind(fungi.all, data.frame("tip.label" = grep(names_TC$tip.label, pattern = "N168", value = T), "Phylum_name" = "BasHet"))
# Plotting the tree
zygo_fort <- fortify(zygo_tree)
zygo_fort$bootstrap <- NA
zygo_fort$bootstrap[!zygo_fort$isTip] <- as.numeric(zygo_tree$node.label)
## Warning: NAs introduced by coercion
zygo_fort$bootstrap[zygo_fort$bootstrap < 70] <- NA
ggtree(zygo_fort, size=0.3) %<+% fungi.all + geom_tiplab(size=1, aes(color=Phylum_name)) + geom_text(aes(label=bootstrap), vjust=-.5, hjust=-.5, size=1) + theme(legend.position = "bottom")
## Warning: Removed 1609 rows containing missing values (geom_text).

Figure was finalized by hand in Adobe Illustrator 2020 to color the missing tip labels, improve legibility and legend,and extend root.
Sup. Figure 7: HGT assay
jcf7180000803233 <- readRDS(file = "SF7/jcf7180000803233_HGTplot.Rds")
chom.sub.all <- readRDS(file = "SF7/jcf7180000803233_Zscores.Rds")
GFF.chrom <- readRDS(file = "SF7/GFF_chrom.jcf7180000803233.Rds")
# Coverage plot
cov.plot <-ggplot(data = jcf7180000803233, aes(x=Pos, y=Cov)) +
geom_line() +
geom_segment(data = GFF.chrom, aes(x=Start, xend=End, y=-1, yend=-1, color=Origin),arrow=arrow(length=unit(0.1,"inches"), type = "closed"), size = 1, lineend = "butt", linejoin = "mitre") +
scale_y_continuous(breaks=seq(0,1500,100)) +
theme_classic() +
theme(legend.position = "bottom")
# Z-score plot
zscore.plot <- ggplot(data=chom.sub.all, aes(x=Pos, y=zscore)) +
geom_point(size=0.5) +
geom_hline(yintercept=c(-1, 1), color="forestgreen") +
geom_hline(yintercept=c(-2, 2), color="red") +
geom_segment(data = GFF.chrom, aes(x=Start, xend=End, y=-1, yend=-1, color=Origin), size = 3) +
theme_bw()
# Plot
ggarrange(cov.plot, zscore.plot ,nrow = 2, labels = c("Coverage", "Zscore"))

jcf7180000797043 <- readRDS(file = "SF7/jcf7180000797043_HGTplot.Rds")
chom.sub.all <- readRDS(file = "SF7/jcf7180000797043_Zscores.Rds")
GFF.chrom <- readRDS(file = "SF7/GFF_chrom.jcf7180000797043.Rds")
# Coverage plot
cov.plot <-ggplot(data = jcf7180000797043, aes(x=Pos, y=Cov)) +
geom_line() +
geom_segment(data = GFF.chrom, aes(x=Start, xend=End, y=-1, yend=-1, color=Origin),arrow=arrow(length=unit(0.1,"inches"), type = "closed"), size = 1, lineend = "butt", linejoin = "mitre") +
scale_y_continuous(breaks=seq(0,1500,100)) +
theme_classic() +
theme(legend.position = "bottom")
# Z-score plot
zscore.plot <- ggplot(data=chom.sub.all, aes(x=Pos, y=zscore)) +
geom_point(size=0.5) +
geom_hline(yintercept=c(-1, 1), color="forestgreen") +
geom_hline(yintercept=c(-2, 2), color="red") +
geom_segment(data = GFF.chrom, aes(x=Start, xend=End, y=-1, yend=-1, color=Origin), size = 3) +
theme_bw()
# Plot
ggarrange(cov.plot, zscore.plot ,nrow = 2, labels = c("Coverage", "Zscore"))

Figure was finalized by hand in Adobe Illustrator 2020